*WIC co-op eWIC new diff in diff analysis only for stores in ZIPs that we see redemptions for
*last modified: 28 April 2025
*last modified by: Charlotte Ambrozek
*this do file implements the csdid2 package to estimate individual 2x2 doubly robust did estimates and aggregate to obtain an att 

clear all
set more off
set rmsg on

set maxvar 120000
set emptycells drop

local data_dir ./data/cleaned
local raw_dir ./data/raw
local out_dir ./analysis/output
local graph_dir ./analysis/output/graphs
local tab_dir ./analysis/output/tables
local log_dir ./documentation/logs
local date: display %tdYY-NN-DD date(c(current_date), "DMY")
di "`date'"
capture log close

log using `log_dir'/ewic_csdid_tip_4zips`date', replace

*import cleaned eWIC implementation info into own frame
*eWIC implementation variable is at county fips/fiscal year level 
*partition data into state/treatment year groups

*import cleaned ewic implementation info into own frame
mkf ewic_fy
frame ewic_fy{
	use ./data/cleaned/ewic_rollout.dta, clear
	rename year fiscalyear
	drop pre* post* 
}

mkf income
frame income{
	use ./data/cleaned/countyyearmedianincome.dta, clear
	keep if year == 2005
	drop year
	destring ct_fips, replace
}

*import two separate cleaned redemptions datasets (second is balanced to only include ZIPs that have non-missing redemptions in all years)
mkf red_fy
frame red_fy{
	use ./data/cleaned/tip_wic_redemptions_05_18, clear
	rename (wic_redemptions) (wicred)
}

mkf red_bal_fy
*load balanced redemptions as an attempted fix
frame red_bal_fy{
	use ./data/cleaned/tip_wic_bal_redemptions_05_18, clear
	rename (wic_redemptions) (wicbalred)
}

mkf countychars
frame countychars{
	use ./data/cleaned/countystats.dta, clear
	keep if year == 2005
	drop year
	destring ct_fips, replace
}

mkf demos
frame demos{
	use ./data/cleaned/demos.dta, clear
	keep if year == 2005
	drop year
	destring ct_fips, replace
}

cwf ewic_fy
frame put ct_fips ev_year state, into(timings)

cwf timings
duplicates drop
encode state, generate(state_code)

frlink m:1 ct_fips, frame(countychars) generate(countylink)
frget medincome sharepoor shareltwic sharelt2fpl sharesnap sharecash sharecashorsnap, from(countylink)
frlink m:1 ct_fips, frame(demos) generate(demoslink)
frget total_pop under5_pop hispanic_total_pop total_black_pop region division urbanrural share_black share_hispanic share_under5, from(demoslink)
gen log_pop = log(total_pop)

*import TIP store data into own frame
mkf tip_fy
frame tip_fy{
	local data_dir ./data/cleaned
	use `data_dir'/tip_auth_sq, clear
	*drop "Direct Distribution Center" and "Home Food Delivery Contractor" types
	gen f = (vendor_type1 == 3 & (vendor_type2 == 4 | missing(vendor_type2))) | (vendor_type1 == 4 & (vendor_type2 == 3 | missing(vendor_type2)))
	bys tip_id: egen flag = mode(f)
	bys tip_id: replace vendor_type1 = vendor_type1[_n+1] if flag == 0 & f == 1
	drop if flag == 1
	drop flag f tip_year_id
	*fill in gaps in state fips using modes
	generate str_fips = string(ct_fips)
	replace str_fips = "0" + str_fips if strlen(str_fips) == 4
	generate st_fips = substr(str_fips, 1, 2)
	destring st_fips, replace 
	
	*drop Vermont and mississippi because of different regiemes prior to eWIC
	*drop NV because ewic turn on then off then on again and we don't have reliable timings
	*drop missing st_fips
	drop if st_fips == 50 | st_fips == 32 | st_fips == 28 | missing(st_fips)

	*flag stores that specialize in WIC
	gen a50 = vendor_type1 == 1 | vendor_type1 == 7 | vendor_type2 == 1 | vendor_type2 == 7
	
	*link in low income/high poverty info
	frlink m:1 ct_fips, frame(income) generate(inc_link)
	frget lowinc highpov highwicelig, from(inc_link)

	*link in controls from timings frame
	frlink m:1 ct_fips, frame(timings) generate(timings_link)
	frget total_pop under5_pop hispanic_total_pop total_black_pop region division urbanrural share_black share_hispanic share_under5 medincome sharepoor shareltwic sharelt2fpl sharesnap sharecash sharecashorsnap, from(timings_link)

    *link in unbalanced redemptions info
    frlink m:1 zip fiscalyear, frame(red_fy) generate(red_link)
	frget wicred, from(red_link)

    *link in balanced redemptions info
    frlink m:1 zip fiscalyear, frame(red_bal_fy) generate(red_bal_link)
	frget wicbalred, from(red_bal_link)

	*link ewic implementation info
	frlink m:1 ct_fips fiscalyear, frame(ewic_fy) generate(ewic_link)
	frget ev_year , from(ewic_link)
	* assume treated at earliest exposure
	bys tip_id: egen ey_min = min(ev_year)
	
	* assume treated at earliest exposure
	replace ev_year = ey_min if ev_year != ey_min & !missing(ey_min)
	bys tip_id (ev_year): assert ev_year[1] == ev_year[_N]

	*assert no dups at future level of xtset 
	duplicates report tip_id fiscalyear 
	assert r(N) == r(unique_value)
	xtset tip_id fiscalyear
	compress

    *this zip has multiple implementation dates and can't be reconciled
	drop if zip == 42223

	unique tip_id 
	* number of unique values of tip id is 91012

    * only keep tip ids in zip codes that have redemptions info
    drop if missing(wicred)
	unique tip_id
	*Number of unique values of tip_id is  29689
}

cwf tip_fy
frame drop ewic_fy

mkf csdidsimple float(b se pval ll ul) str25(outcome subsample state controls)
mkf csdidevent float(b se pval ll ul) int(ev_yr) str25(outcome subsample state controls)
mkf csdideventavg float(avgb avgse avgpval avgll avgul) int(times) str25(time outcome subsample state controls)
mkf csdidgroup float(b se pval ll ul) str25(gr_yr outcome subsample state controls)

cwf tip_fy
cd `out_dir'
csdid auth , ivar(tip_id) time(fiscalyear) gvar(ev_year) notyet long2 saverif(csdid_tip_4zips_auth) replace cluster(st_fips)
mkf post
cwf post
use csdid_tip_4zips_auth, clear
csdid_stats simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("auth") ("all") ("all") ("no")
csdid_stats event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("auth") ("all") ("all") ("no")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("auth") ("all") ("all") ("no")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("auth") ("all") ("all") ("no")
}
csdid_stats group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
di "`names'"
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("auth") ("all") ("all") ("no")
}

cwf tip_fy
frame put _all if chain == 1, into(chain)
cwf chain

cd `out_dir'
csdid auth, ivar(tip_id) time(fiscalyear) gvar(ev_year) notyet long2 saverif(csdid_tip_4zips_authchain) replace cluster(st_fips)
cwf post
use csdid_tip_4zips_authchain, clear
csdid_stats simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("auth") ("chain") ("all") ("no")
csdid_stats event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("auth") ("chain") ("all") ("no")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("auth") ("chain") ("all") ("no")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("auth") ("chain") ("all") ("no")
}
csdid_stats group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("auth") ("chain") ("all") ("no")
}

cwf tip_fy
frame put _all if chain == 0, into(indep)
cwf indep
frame drop chain
* indepedent store estimation
cd `out_dir'
csdid auth, ivar(tip_id) time(fiscalyear) gvar(ev_year) notyet long2 saverif(csdid_tip_4zips_authindep) replace cluster(st_fips)
cwf post 
use csdid_tip_4zips_authindep, clear
csdid_stats simple
frame post csdidsimple (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("auth") ("indep") ("all") ("no")
csdid_stats event, window(-4 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("auth") ("indep") ("all") ("no")
frame post csdideventavg (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("auth") ("indep") ("all") ("no")
local i = 2
forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("auth") ("indep") ("all") ("no")
}
csdid_stats group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("auth") ("indep") ("all") ("no")
}

frame csdideventavg{
	expand times, generate(new)
	bys time outcome subsample controls (new): gen ev_yr = sum(new)
	replace ev_yr = ev_yr - 4 if time == "pre"
	drop time new times
}

cwf csdidevent
frlink 1:1 ev_yr outcome subsample controls, frame(csdideventavg)
frget avgb avgse avgpval avgll avgul, from(csdideventavg)
drop csdideventavg

frame drop csdideventavg

cwf csdidsimple
/* frame drop highwiceligindep */
save `out_dir'/csdid_tip_4zips_simple.dta, replace
cwf csdidevent
save `out_dir'/csdid_tip_4zips_event.dta, replace  
cwf csdidgroup
save `out_dir'/csdid_tip_4zips_group.dta, replace  
log close 
